library(tidyverse)
library(data.table)
library(igraph)
library(tidygraph)
library(Matrix)
library(bigalgebra)

source('code/estimation_functions.R')

#### Generate Different Scores ####
# This section generates different sets of estimates:
# 1. Seeded with Bonica
# 2. Seeded with Crosson
# 3. Seeded with Hansford
# 4. Seeded with Barbera
# 5. Seeded with the rowwise mean of Crosson, Hansford, and Barbera
# 6. Seeded with the rowwise mean of Bonica, Crosson, Hansford, and Barbera

set.seed(12345)

load("data/bonica_ses.RData")
bonica_ses = out5 

g = make_orgs_and_briefs_graph()


#get rid of exog orgs that aren't on the graph
vertices = g %>% activate(nodes)  %>% pull(name)
bonica_ses = out5  %>%
  filter(amicus %in% vertices)

## Generate the Bonica-based scores
M = make_transition_matrix(g,bonica_ses$amicus,'Bonica')
bonica_scores_full = estimate_ideology_without_steady_state(M, exog = bonica_ses$amicus,
                                                   score_vector = bonica_ses$score)

## Generate the weighted Bonica-based scores
org_weights = get_centrality_weights_from_org_graph(centrality_eigen)
M_weighted = make_weighted_transition_matrix(g,exog = bonica_ses$amicus,org_weights)
bonica_weighted_scores_full = estimate_ideology_without_steady_state(M_weighted, exog = bonica_ses$amicus,
                                                              score_vector = bonica_ses$score)

## Generate the estimates seeded by previous scores
more_scores = read.csv("data/crosson_hansford_barbera_walk.csv",row.names = 1) %>% 
  left_join( 
    read.csv("data/acnet_Vertices.csv") %>%
      transmute(orgId,name=preprocess_text(properName)) %>%
      as_tibble(),
    by=c("orgID"="orgId")) %>%
  filter(!is.na(name))
more_scores = more_scores[!duplicated(more_scores$name),]

hansford.exog.orgs = more_scores %>% filter(!is.na(hansford)) %>% pull(name)
hansford.exog.scores = more_scores %>% filter(!is.na(hansford)) %>% pull(hansford)

M = make_transition_matrix(g,hansford.exog.orgs,'Hansford')
hansford_scores_full = estimate_ideology_without_steady_state(M, exog = hansford.exog.orgs,
                                                            score_vector = hansford.exog.scores)

crosson.exog.orgs = more_scores %>% filter(!is.na(crosson)) %>% pull(name)
crosson.exog.scores = more_scores %>% filter(!is.na(crosson)) %>% pull(crosson)

M = make_transition_matrix(g,crosson.exog.orgs,'Crosson')
crosson_scores_full = estimate_ideology_without_steady_state(M, exog = crosson.exog.orgs,
                                                              score_vector = crosson.exog.scores)

barbera.exog.orgs = more_scores %>% filter(!is.na(barbera)) %>% pull(name)
barbera.exog.scores = more_scores %>% filter(!is.na(barbera)) %>% pull(barbera)

M = make_transition_matrix(g,barbera.exog.orgs,'Barbera')
barbera_scores_full = estimate_ideology_without_steady_state(M, exog = barbera.exog.orgs,
                                                              score_vector = barbera.exog.scores)

avg.exog.orgs = more_scores %>% filter(!is.na(avg)) %>% pull(name)
avg.exog.scores = more_scores %>% filter(!is.na(avg)) %>% pull(avg)

M = make_transition_matrix(g,avg.exog.orgs,'Average')
avg_scores_full = estimate_ideology_without_steady_state(M, exog = avg.exog.orgs,
                                                              score_vector = avg.exog.scores)

avg2.exog.orgs = more_scores %>% filter(!is.na(avg2)) %>% pull(name)
avg2.exog.scores = more_scores %>% filter(!is.na(avg2)) %>% pull(avg2)

M = make_transition_matrix(g,avg2.exog.orgs)
avg2_scores_full = estimate_ideology_without_steady_state(M, exog = avg2.exog.orgs,
                                                         score_vector = avg2.exog.scores)

out = list(data.frame(orgname = names(as.matrix(bonica_scores_full)[,1]),
                      bonica_score = as.matrix(bonica_scores_full)[,1]),
           data.frame(orgname = names(as.matrix(hansford_scores_full)[,1]),
                      hansford_score = as.matrix(hansford_scores_full)[,1]),
           data.frame(orgname = names(as.matrix(crosson_scores_full)[,1]),
                      crosson_score = as.matrix(crosson_scores_full)[,1]),
           data.frame(orgname = names(as.matrix(barbera_scores_full)[,1]),
                      barbera_score = as.matrix(barbera_scores_full)[,1]),
           data.frame(orgname = names(as.matrix(avg_scores_full)[,1]),
                      avg_score = as.matrix(avg_scores_full)[,1]),
           data.frame(orgname = names(as.matrix(avg2_scores_full)[,1]),
                      avg2_score = as.matrix(avg2_scores_full)[,1]))

out = Reduce(function(...) merge(..., all=T), out)


#### Leave-one-out Cross-validation ####
source('code/estimation_functions.R')

set.seed(12345)

load("data/bonica_ses.RData")
g = make_orgs_and_briefs_graph()

#get rid of exog orgs that aren't on the graph
vertices = g %>% activate(nodes) %>% pull(name)
bonica_ses = out5  %>%
  filter(amicus %in% vertices)

#bonica_weighted_loo = loo_crossval(graph = g,
#                                   exog = bonica_ses$amicus,
#                                   score_vector = bonica_ses$score,
#                                   weights = TRUE)


bonica_loo = loo_crossval(graph = g,
                          exog = bonica_ses$amicus,
                          score_vector = bonica_ses$score)

crosson_loo = loo_crossval(graph = g,
                                   exog = crosson.exog.orgs,
                                   score_vector = crosson.exog.scores)

hansford_loo = loo_crossval(graph = g,
                                    exog = hansford.exog.orgs,
                                    score_vector = hansford.exog.scores)

barbera_loo = loo_crossval(graph = g,
                                     exog = barbera.exog.orgs,
                                     score_vector = barbera.exog.scores)

avg_loo = loo_crossval(graph = g,
                                     exog = avg.exog.orgs,
                                     score_vector = avg.exog.scores)

avg2_loo = loo_crossval(graph = g,
                                exog = avg2.exog.orgs,
                                score_vector = avg2.exog.scores)


save.image("data/scores_crossvals.RData")

#### Bootstrapped SEs ####
# This section calculates bootstrapped standard errors
  # from the scores seeded by Bonica

set.seed(12345)

load("data/bonica_ses.RData")
bonica_ses = out5 

g = make_orgs_and_briefs_graph()
#get rid of exog orgs that aren't on the graph
vertices = g %>% activate(nodes) %>% pull(name)
bonica_ses = out5  %>%
  filter(amicus %in% vertices)

bootstrap_out = list()
for(i in 1:2000){
  #for(i in 1:20){ ## for testing
  
  M = make_transition_matrix(g,bonica_ses$amicus)
  
  bonica_newscores = rnorm(nrow(bonica_ses), mean = bonica_ses$score,
                           sd = bonica_ses$se)
  
  p = estimate_ideology_without_steady_state(M, exog = bonica_ses$amicus,
                                                     score_vector = bonica_newscores)
  #p_canonical_order = as.matrix(p)[canonical_row_order,1]
  bootstrap_out[[i]] = as.matrix(p)[,1]
  print(i)
}

bootstrap_scores = as.data.frame(do.call(cbind, bootstrap_out))
save(bootstrap_scores, file="data/bootstrap_results.RData")

bootstrap_ses = matrixStats::rowSds(as.matrix(bootstrap_scores))
summary(bootstrap_ses)

bootstrap_ses = data.frame(orgname = rownames(bootstrap_scores),
                           bonica_ses = bootstrap_ses)

tmp = out %>% left_join(bootstrap_ses, by="orgname") %>%
  select(bonica_score, bonica_ses) %>%
  arrange(desc(bonica_ses))
head(tmp, 20)

ggplot(tmp) + 
  geom_density(aes(x=bonica_ses), fill="steelblue1") +
  theme_bw() + 
  xlab("ACNet SEs") + ylab("")
ggsave("figures/se_density.pdf")

#### Combining, Outputting, Evaluating ####

load("data/scores_crossvals.RData")
load("data/bootstrap_results.RData")

library(ggcorrplot)

cormat = out[,-1]
cormat = cormat[complete.cases(cormat),]
cormat = cor(cormat)
rownames(cormat) = colnames(cormat) = c("1. Bonica", "2. Hansford", "3. Crosson", "4. Barbera",
                                        "5. Average 2-4", "6. Average 1-4")
ggcorrplot(cormat, type="full",
           ggtheme = ggplot2::theme_bw)
ggsave("figures/measures_corplot.pdf")

cormat2 = cormat[-c(1,6),-c(1,6)]

summary(unlist(cormat2[upper.tri(cormat2)]))
mean(cormat[2:5,1]) # 0.632


## How many orgs/briefs are scorable?
sapply(2:7, FUN=function(x) sum(out[,x]!=0, na.rm=T))
# [1] 24280 22203 22130 22141 22593 24569

mad(bonica_loo[,1]-bonica_loo[,2], na.rm=T) # 0.675
mean(sign(bonica_loo[,1])==sign(bonica_loo[,2]), na.rm=T) # 0.686

idx_tmp = abs(bonica_loo$truth)>0.75
mean(sign(bonica_loo[idx_tmp,1])==sign(bonica_loo[idx_tmp,2]), na.rm=T) # 0.781



mad(crosson_loo[,1]-crosson_loo[,2], na.rm=T) # 0.448
mean(sign(crosson_loo[,1])==sign(crosson_loo[,2]), na.rm=T) # 0.884

mad(hansford_loo[,1]-hansford_loo[,2],  na.rm=T) # 0.152
mean(sign(hansford_loo[,1])==sign(hansford_loo[,2]), na.rm=T) # 0.941

mad(barbera_loo[,1]-barbera_loo[,2], na.rm=T) # 0.653
mean(sign(barbera_loo[,1])==sign(barbera_loo[,2]), na.rm=T) # 0.877

mad(avg_loo[,1]-avg_loo[,2], na.rm=T) # 0.478
mean(sign(avg_loo[,1])==sign(avg_loo[,2]), na.rm=T) # 0.873

mad(avg2_loo[,1]-avg2_loo[,2], na.rm=T) # 0.596
mean(sign(avg2_loo[,1])==sign(avg2_loo[,2]), na.rm=T) # 0.742


## Disagreements table
bonica_disagree = bonica_loo %>% mutate(org = bonica_ses$amicus,
                                        dif = abs(crossval_ests-truth)) %>%
  arrange(desc(dif)) %>% 
  mutate(ACNet = crossval_ests, DIME = truth, org = toupper(org)) %>%
  select(org, ACNet, DIME) %>% head(25)
print(xtable::xtable(bonica_disagree),include.rownames=FALSE, file="tables/bonica_disagree.tex")


## This is not in the paper but maybe should go in later
crosson_disagree = out %>% filter(!is.na(crosson_score)) %>%
  select(orgname, bonica_score, crosson_score) %>%
  mutate(dif = abs(bonica_score-crosson_score)) %>%
  arrange(desc(dif)) %>% head(25)
print(xtable::xtable(bonica_disagree),include.rownames=FALSE, file="tables/crosson_disagree.tex")



## Crossval comparisons
set.seed(12345)

bonica_crosson = loo_crossval(graph = g,
                          exog = bonica_ses$amicus[bonica_ses$amicus %in% crosson.exog.orgs],
                          score_vector = bonica_ses$score[bonica_ses$amicus %in% crosson.exog.orgs])

crosson_bonica = loo_crossval(graph = g,
                           exog = crosson.exog.orgs[crosson.exog.orgs %in% bonica_ses$amicus],
                           score_vector = crosson.exog.scores[crosson.exog.orgs %in% bonica_ses$amicus])

bonica_hansford = loo_crossval(graph = g,
                              exog = bonica_ses$amicus[bonica_ses$amicus %in% hansford.exog.orgs],
                              score_vector = bonica_ses$score[bonica_ses$amicus %in% hansford.exog.orgs])

hansford_bonica = loo_crossval(graph = g,
                              exog = hansford.exog.orgs[hansford.exog.orgs %in% bonica_ses$amicus],
                              score_vector = hansford.exog.scores[hansford.exog.orgs %in% bonica_ses$amicus])

bonica_barbera = loo_crossval(graph = g,
                              exog = bonica_ses$amicus[bonica_ses$amicus %in% barbera.exog.orgs],
                              score_vector = bonica_ses$score[bonica_ses$amicus %in% barbera.exog.orgs])

barbera_bonica = loo_crossval(graph = g,
                              exog = barbera.exog.orgs[barbera.exog.orgs %in% bonica_ses$amicus],
                              score_vector = barbera.exog.scores[barbera.exog.orgs %in% bonica_ses$amicus])

bonica_avg = loo_crossval(graph = g,
                              exog = bonica_ses$amicus[bonica_ses$amicus %in% avg.exog.orgs],
                              score_vector = bonica_ses$score[bonica_ses$amicus %in% avg.exog.orgs])

avg_bonica = loo_crossval(graph = g,
                              exog = avg.exog.orgs[avg.exog.orgs %in% bonica_ses$amicus],
                              score_vector = avg.exog.scores[avg.exog.orgs %in% bonica_ses$amicus])

bonica_avg2 = loo_crossval(graph = g,
                              exog = bonica_ses$amicus[bonica_ses$amicus %in% avg2.exog.orgs],
                              score_vector = bonica_ses$score[bonica_ses$amicus %in% avg2.exog.orgs])

avg2_bonica = loo_crossval(graph = g,
                              exog = avg2.exog.orgs[avg2.exog.orgs %in% bonica_ses$amicus],
                              score_vector = avg2.exog.scores[avg2.exog.orgs %in% bonica_ses$amicus])


crossvalstats = function(crossval_out, bonica_crossval){
  r1 = mad(crossval_out[,1]-crossval_out[,2], na.rm=T)
  r2 = mean(sign(crossval_out[,1])==sign(crossval_out[,2]), na.rm=T)
  
  r3 = mad(bonica_crossval[,1]-bonica_crossval[,2], na.rm=T)
  r4 = mean(sign(bonica_crossval[,1])==sign(bonica_crossval[,2]), na.rm=T)
  out = c(nrow(crossval_out), r1, r3, r2, r4)
  names(out) = c('Orgs', 'MAD', 'DIME MAD', 'Side Agreement', 'DIME Side Agreement')
  return(out)
}

crossval_results = rbind(
  crossvalstats(crosson_bonica, bonica_crosson),
  crossvalstats(hansford_bonica, bonica_hansford),
  crossvalstats(barbera_bonica, bonica_barbera),
  crossvalstats(avg_bonica, bonica_avg)
)

print(xtable::xtable(crossval_results),include.rownames=FALSE, file="tables/crossval_results.tex")


### Produce and output final scores

vertices = read.csv("data/acnet_Vertices.csv") %>%
  rename(orgID=orgId) %>%
  transmute(orgID,name=preprocess_text(properName)) %>%
  as_tibble()

briefs = read.csv("data/acnet_Briefs.csv") %>%
  rename(orgID=briefs) %>%
  transmute(orgID,name=orgID) %>%
  as_tibble()

vertices = rbind(vertices, briefs)

load("data/bonica_ses.RData")


## Take the "out" object
acnet_final = out %>%
  left_join(bonica_loo %>% mutate(orgname = bonica_ses$amicus)) %>%
  mutate(acnet_score = bonica_score,
         acnet_score = fifelse(acnet_score==crossval_ests,
                               yes= bonica_score, no = crossval_ests),
         acnet_score = fifelse(acnet_score==0 & bonica_score != 0,
                               yes= bonica_score, no= acnet_score),
         acnet_score = fifelse(is.na(acnet_score), 
                               yes = bonica_score, no = acnet_score),
         DIME = truth) %>%
  mutate(acnet_score_avg = avg2_score,
         acnet_score_avg = fifelse(acnet_score_avg==crossval_ests,
                               yes= avg2_score, no = crossval_ests),
         acnet_score_avg = fifelse(acnet_score_avg==0 & avg2_score != 0,
                               yes= avg2_score, no= acnet_score_avg),
         acnet_score_avg = fifelse(is.na(acnet_score_avg), 
                               yes = avg2_score, no = acnet_score)) %>%
  select(-bonica_score, -avg_score, -avg2_score, -crossval_ests, -truth) %>%
  relocate(orgname, acnet_score, DIME) %>%
  full_join(bonica_ses, by = c("orgname"="amicus")) %>%
  select(-n, -num, -score,-orgID) %>%
  arrange(orgname, acnet_score, acnet_score_avg, se, DIME) %>%
  left_join(vertices, by=c("orgname" = "name"))

acnet_final = acnet_final %>%
  mutate( brief = grepl(x = acnet_final$orgname, pattern = "B-[[:digit:]]+"),
          exogenous = orgID %in% out5$orgID)

save(acnet_final, file="data/acnet_scores_5_13_22.RData")

acnet_final %>% mutate(missing_score = is.na(acnet_score)) %>%
  group_by(brief, missing_score) %>%  summarize(n())
